#delimit ;
clear;
*use as input: oil_prices.dta - the output from the oil price calculation in R (2);
*generate normalized versions of variables before calculating factor loadings;
*for apigravity, depth, and field_age but not offshore (which is binary);
use "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_prices.dta";
sum avgapigravity;
sum avgdepth;
sum field_age;
*reverse is the avgapigravity variable, reverse coded so that higher values indicate;
*heavier petroleum, which is more difficult to extract --> reverse = max(avgapigravity) - avgapigravity;
sum reverse;

*clear out the variables
replace sd_apigravity=.;
replace sd_depth=.;
replace sd_age=.;
replace sd_reverse=.;
replace api01=.;
replace depth01=.;
replace age01=.;
replace reverse01=.;

*create field level difficulty variables
*diffx = additive index of difficulty components recoded to the [0,1] interval;
gen diffx=.;
*diffx_sd = additive index of standardized difficulty components, mean centered;
gen diffx_sd=.;

replace sd_apigravity = avgapigravity/9.521218;
replace sd_depth = avgdepth/3240.675;
replace sd_age = field_age/20.90374;
replace sd_reverse = reverse/9.521218;
replace api01 = (avgapigravity-0.4)/(89-0.4);
replace depth01 = (avgdepth - 140)/(19723-140);
replace age01 = field_age/164;
replace reverse01 = reverse/88.6;

*mean center the variables
sum sd_reverse;
generate sd_reverse_mc = sd_reverse - 5.767173;
generate sd_apigravity_mc = sd_apigravity - 3.58037;
generate sd_age_mc = sd_age - 1.306035;
generate sd_depth_mc = sd_depth - 2.165296;


*generate difficulty scores;
replace diffx = reverse01+depth01+offshore;
replace diffx_sd = sd_reverse_mc + sd_depth_mc + offshore;

*difficulty * production at the field level;
gen diffx_oil=.;
gen diffx_sd_oil=.;


*prepare to create weighted averages;

replace diffx_oil = diffx*oil;
replace diffx_sd_oil = diffx_sd*oil;

*save output before aggregation to country level;
save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_prices.dta", replace;

*aggregate to country level;
collapse (sum) diffx_oil diffx_sd_oil oil dollars_by_field, by (country years);

*now working in a country-level environment;
save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_country_level1.dta", replace;

*construct weighted averages (sum of prod*difficulty at field level by country)/(total production by country);
generate wdiffx=diffx_oil/oil;
generate wdiffx_sd = diffx_sd_oil/oil;


*prepare to construct a weighted average to aggregate individual emirates into the UAE;

generate wdiffx_oil = wdiffx*oil;
generate wdiffx_sd_oil = wdiffx_sd*oil;

*save results;
save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_country_level1.dta", replace;

*aggregate individual emirates up to uae;

collapse (sum) oil wdiffx_oil wdiffx_sd_oil dollars_by_field if (country=="abu dhabi" | country=="dubai" | country=="ras al khaimah" | country=="sharjah"), by (years);

gen wdiffx = wdiffx_oil/oil;
gen wdiffx_sd = wdiffx_sd_oil/oil;
gen country="uae";

save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_uae.dta", replace;

use "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_country_level1.dta";

*append the aggregated data for the UAE to the main dataset;
append using "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_uae.dta";

*prepare data for merging with main dataset by matching country names and time periods;

*drop individual emirates out of dataset now that UAE as a whole is included;
drop if country=="abu dhabi" | country=="dubai" | country=="ras al khaimah" | country=="sharjah";

*drop countries not included in oil dataset, set country names to match oil dataset where there is a mismatch or typo;
drop if country=="barbados";
drop if country=="brunei";
replace country="congo brazzaville" if country=="congo (brazzaville)";
replace country="congo kinshasa" if country=="congo (formerly zaire)";
list years oil if country=="south korea";
replace country="korea south" if country=="south korea";
list years oil if country=="belize";
drop if country=="belize";
list years oil if country=="kyrgyzstan";
drop if country=="kyrgyzstan" & years<1991;
drop if country=="russia" & years<1991;
replace country="trinidad and tobago" if country=="trinidad & tobago";
drop if country=="russia" & years<1992;
list years oil if country=="yemen";
drop if country=="yemen" & years<1990;
replace country="argentina" if country=="argenti";
replace country="canada" if country=="cada";
replace country="china" if country=="chi";
drop if country=="chi-taiwan" | country=="chitaiwan";
replace country="ghana" if country=="gha";
drop if country=="neutral zone";
replace country="suriname" if country=="surime";
replace country="vietnam" if country=="vietm";

save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_country_level_for_merge.dta", replace;
use "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil and democracy full dataset with diffusion current1.dta";

*add on the oil and difficulty data;

merge 1:1 country years using "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\oil_output_and_difficulty_country_level_for_merge.dta", keepusing (oil dollars_by_field wdiffx wdiffx_sd);
drop _merge;

*add min value to difficulty for countries with no oil (so that they aren't listwise deleted in analysis);
gen wdiffx_0 = wdiffx;
replace wdiffx_0 = 0.7738188 if wdiffx==.;

gen wdiffx_sd_0 = wdiffx_sd;
replace wdiffx_sd_0 = -2.100509 if wdiffx_sd_0==.;

*per-capitize oil output, and include zeros;
gen oil_pc = oil/pop;
replace oil_pc = 0 if oil_pc ==.;

gen oil_revenue_pc = dollars_by_field/pop;
replace oil_revenue_pc = 0 if oil_revenue_pc ==.;


egen cnmr = group(country);
xtset cnmr years;
gen firstdiff=d1.polity2_original;

save "C:\Users\kurtz.61\Dropbox\marcus_oil\replication_code\programs\replication_data.dta", replace;
 



